########################################################################################################
##                                                                                                    ##
##  Functions required for replicating simulations and analyses in Tsai, Tsung-han. 2014.             ##
##  "A Bayesian Approach to Dynamic Panel Data Models with Rarely Changing Variables."                ##
##                                                                                                    ##
########################################################################################################

## CLEAN UP
rm(list=ls());

########################################################################################################
## GENERATE UNCORRELATED EXPLANATORY VARIABLES: x1, x2, z1, z2;
uncor.var <- function(n.state=20, t=30, mu.x=c(1,0), mu.z=c(2,2), sd.x=c(3,1), sd.z=c(1,2) ) {
	
	# n.state: the number of units
	# t: the number of time points
	# mu.x: mean of unit-level time varying variables x
	# mu.z: mean of unit-level time-invariant variables z
	# sd.x: sd of unit-level time varying variables x
	# sd.z: sd of unit-level time-invariant variables z
	
	n.state <- n.state; t <- t; mu.x <- mu.x; mu.z <- mu.z; sd.x <- sd.x; sd.z <- sd.z;

	## GENERATE THE TIME-VARIANT VARIABLES WITH DIFFERENT MEANS AND VARIANCES ACROSS COUNTRIES AND VARIABLES
	# Variable x1
	mu.x1 <- rnorm(n.state, mean=mu.x[1], sd=sd.x[1]);
	x1 <- matrix(NA, nrow=n.state, ncol=t);
	for (j in 1:n.state) {
		x1[j,] <- rnorm(t, mean=mu.x1[j], sd=1);
	}

	# Variable x2
	mu.x2 <- rnorm(n.state, mean=mu.x[2], sd=sd.x[2]);
	x2 <- matrix(NA, nrow=n.state, ncol=t);
	for (j in 1:n.state) {
		x2[j,] <- rnorm(t, mean=mu.x2[j], sd=0.1);
	}

	## GENERATE SLOWLY CHANGING VARIABLES WITH DIFFERENT MEANS AND VARIANCES ACROSS COUNTRIES AND VARIABLES
	# Variable z1
	z.sample <- sample(0:(t/2), n.state-3, replace=TRUE); # DETERMINE TIME PERIODS THE VALUE PERSISTS
	z.sample <- c(rep(0,3), z.sample); # THE FIRST THREE COUNTRIES ARE TIME-INVARIANT

	mu.z1 <- rnorm(n.state, mean=mu.z[1], sd=sd.z[1]);
	z1 <- matrix(NA, nrow=n.state, ncol=t);
	for (j in 1:n.state) {
		z1[j,] <- rep(c(mu.z1[j], mu.z1[j]+rnorm(1,mean=0,sd=.5)), times=c(z.sample[j], t-z.sample[j]));
	}
	
	# Variable z2
	z.sample <- sample(0:(t/2), n.state-3, replace=TRUE); # DETERMINE TIME PERIODS THE VALUE PERSISTS
	z.sample <- c(z.sample, rep(0,3)); # THE FIRST THREE COUNTRIES ARE TIME-INVARIANT

	mu.z2 <- rnorm(n.state, mean=mu.z[2], sd=sd.z[2]);
	z2 <- matrix(NA, nrow=n.state, ncol=t);
	for (i in 1:n.state) {
		z2[i,] <- rep(c(mu.z2[i], mu.z2[i]+rnorm(1,mean=0,sd=.5)), times=c(z.sample[i], t-z.sample[i]));
	}
		
	return(list("x1"=x1, "x2"=x2, "z1"=z1, "z2"=z2));
}  # END OF uncor.var



########################################################################################################
## GENERATE CORRELATED EXPLANATORY VARIABLES AND UNIT EFFECTS: x3, z3, delta;
cor.var <- function(n.state=20, t=30, mu.delta=0, sd.delta=1, cor.xu=0.8, cor.zu=0.3) {
	
	# n.state: the number of units
	# t: the number of time points
	# mu.delta: mean of unit effects
	# sd.delta: sd of unit effects
	# cor.x: correlation between x3 and unit effects
	# cor.z: correlation between z3 and unit effects
	
	n.state <- n.state; t <- t; mu.delta <- mu.delta; sd.delta <-sd.delta; cor.xu <- cor.xu; cor.zu <- cor.zu;
	
	## GENERATE THE UNIT EFFECTS ACROSS UNITS
	delta <- rnorm(n.state, mean=mu.delta, sd=sd.delta);
	unief <- rep(delta, each=t);

	## GENERATE THE THIRD TIME-VARIANT VARIABLE x3, WHICH IS CORRELATED WITH THE UNIT EFFECTS
	mu.x3 <- corgen(x=delta, r=cor.xu, epsilon=0.01)$y;

	x3 <- matrix(NA, nrow=n.state, ncol=t, byrow=TRUE);
	for (i in 1:length(mu.x3)) {
		x3[i,] <- rnorm(n=t, mean=mu.x3[i], sd=sqrt(sd.delta^2 - ((cor.xu*sd.delta*1)^2)/1 ) );
	}

	t.x3 <- matrix(t(x3), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.cor.xu <- cor(unief, t.x3);

	# GENERATE THE THIRD SLOWLY CHANGING VARIABLE z3, WHICH IS CORRELATED WITH THE UNIT EFFECTS
	z.sample <- sample(0:(t/2), n.state-3, replace=TRUE); 
	z.sample <- c(rep(0,3), z.sample);

	mu.z3 <- corgen(x=delta, r=cor.zu, epsilon=0.01)$y;

	z3 <- matrix(NA, nrow=n.state, ncol=t);
	for (i in 1:n.state) {
		z3[i,] <- rep(c(mu.z3[i], mu.z3[i]+rnorm(1,mean=1,sd=.5)), times=c(z.sample[i], t-z.sample[i]));
	}

	t.z3 <- matrix(t(z3), nrow=n.state*t, ncol=1)
	sim.cor.zu <- cor(unief, t.z3);

	return(list(x3=x3, z3=z3, xu.corr=sim.cor.xu, zu.corr=sim.cor.zu, delta=delta, mu.x3=mu.x3, mu.z3=mu.z3, unief=unief));
		
}  # END OF cor.var



########################################################################################################
## GENERATE TIME-SERIES CROSS-SECTIONAL DATA SET;
sim.data <- function(n.state=20, t=30, mu=1, phi=0.8, beta=c(0.5,2,-1.5,-2.5,1.8,3), x1, x2, x3, z1, z2, z3, delta) {
	
	# n.state: the number of units
	# t: the number of time points
	# mu: intercept (grand mean)
	# phi: autoregressive coefficient
	# beta: vector of regression coefficients
	# x1, x2, x3: time varying variables
	# z1, z2, z3: slowly changing variables
	# delta: unit specific effects generated by *cor.var* function
	
	n.state <- n.state; t <- t; mu <- mu; phi <-phi; beta <- beta; x1 <- x1; x2 <- x2; x3 <- x3; z1 <- z1; z2 <- z2; z3 <- z3;
	
	# GENERATE THE INITIAL VALUE OF Y BY THE LONG-TERM MEAN AND LONG-TERM VARIANCE
	long.y0 <- mu + delta;

	y0 <- rnorm(n.state, mean=long.y0/(1-phi), sd=sqrt(1/(1-phi^2))); y0;


	# GENERATE OUTCOMES BY SETTING THE DATA GENERATING PROCESS AS WHAT I DEFINE
	y <- matrix(NA, nrow=n.state, ncol=t);

	for (i in 1:n.state) {
		y[i,1] <- mu + phi*y0[i] + beta[1]*x1[i,1] + beta[2]*x2[i,1] + beta[3]*x3[i,1] + beta[4]*z1[i,1] + beta[5]*z2[i,1] + beta[6]*z3[i,1] + delta[i] + rnorm(1, mean=0, sd=1);
		for (t in 2:t) {
			y[i,t] <- mu + phi*y[i,t-1] + beta[1]*x1[i,t] + beta[2]*x2[i,t] + beta[3]*x3[i,t] + beta[4]*z1[i,t] + beta[5]*z2[i,t] + beta[6]*z3[i,t] + delta[i] + rnorm(1, mean=0, sd=1);
		}
	}

	# REARRANGE THE DATA FRAME
	sim.data <- matrix(NA, nrow=n.state*t, ncol=3);
	colnames(sim.data) <- c("country", "year", "y");
	sim.data[,1] <- rep(1:n.state, each=t);
	sim.data[,2] <- rep(1:t, times=n.state);

	# ARRANGE y
	sim.data[,3] <- matrix(t(y), nrow=n.state*t, ncol=1, byrow=FALSE);

	#
	sim.data <- data.frame(sim.data);

	# ARRANGE LAGGED y
	ly <- cbind(y0, y[,-t]);
	sim.data$ly <- matrix(t(ly), nrow=n.state*t, ncol=1, byrow=FALSE);

	# ARRANGE x AND z
	sim.data$x1 <- matrix(t(x1), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.data$x2 <- matrix(t(x2), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.data$x3 <- matrix(t(x3), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.data$z1 <- matrix(t(z1), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.data$z2 <- matrix(t(z2), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.data$z3 <- matrix(t(z3), nrow=n.state*t, ncol=1, byrow=FALSE);


	# CENTERING DATA FOR fevd MODEL: SUBTRACT GROUP MEAN
	sim.data$ybar <- rep(rowMeans(y), each=t);
	sim.data$lybar <- rep(rowMeans(ly), each=t);
	sim.data$x1bar <- rep(rowMeans(x1), each=t);
	sim.data$x2bar <- rep(rowMeans(x2), each=t);
	sim.data$x3bar <- rep(rowMeans(x3), each=t);
	sim.data$z1bar <- rep(rowMeans(z1), each=t);
	sim.data$z2bar <- rep(rowMeans(z2), each=t);
	sim.data$z3bar <- rep(rowMeans(z3), each=t);

	sim.data$cy <- sim.data$y-rep(rowMeans(y), each=t);
	sim.data$cly <- sim.data$ly-rep(rowMeans(ly), each=t);
	sim.data$cx1 <- sim.data$x1-rep(rowMeans(x1), each=t);
	sim.data$cx2 <- sim.data$x2-rep(rowMeans(x2), each=t);
	sim.data$cx3 <- sim.data$x3-rep(rowMeans(x3), each=t);
	sim.data$cz1 <- sim.data$z1-rep(rowMeans(z1), each=t);
	sim.data$cz2 <- sim.data$z2-rep(rowMeans(z2), each=t);
	sim.data$cz3 <- sim.data$z3-rep(rowMeans(z3), each=t);

	return(sim.data);

}  # END OF sim.data




###############################################################
## CALCULATE A HIGHEST POSTERIOR DENSITY (HPD) INTERVAL
HPDint <- function(obj, prob = 0.95, ...)
{
    obj <- as.matrix(obj)
    vals <- apply(obj, 2, sort)
    if (!is.matrix(vals)) stop("The object must have n.samp > 1")
    n.samp <- nrow(vals)
    npar <- ncol(vals)
    gap <- max(1, min(n.samp - 1, round(n.samp * prob)))
    init <- 1:(n.samp - gap)
    inds <- apply(vals[init + gap, ,drop=FALSE] - vals[init, ,drop=FALSE], 2, which.min)
    ans <- round(cbind(vals[cbind(inds, 1:npar)], vals[cbind(inds + gap, 1:npar)]), 3)
    dimnames(ans) <- list(colnames(obj), c("lower", "upper"))
    
    output <- list(HPDinterval=ans, Probability=gap/n.samp)
    #print(output)
}


########################################################################################################
## GENERATE CORRELATED EXPLANATORY VARIABLES AND UNIT EFFECTS: x3, z3, delta;
unif.var <- function(n.state=20, t=30, mu.delta=0, sd.delta=1, cor.xu=0.8, cor.zu=0.3, unif.lower=-2, unif.upper=2) {
	
	# n.state: the number of units
	# t: the number of time points
	# mu.delta: mean of unit effects
	# sd.delta: sd of unit effects
	# cor.x: correlation between x3 and unit effects
	# cor.z: correlation between z3 and unit effects
	# unif.lower: the lower bound of uniform distribution
	# unif.upper: the upper bound of uniform distribution
	
	n.state <- n.state; t <- t; mu.delta <- mu.delta; sd.delta <-sd.delta; cor.xu <- cor.xu; cor.zu <- cor.zu;
	unif.lower <- unif.lower; unif.upper <- unif.upper;
	
	## GENERATE THE UNIT EFFECTS ACROSS UNITS
	delta <- runif(n=n.state, min= unif.lower, max= unif.upper);
	unief <- rep(delta, each=t);

	## GENERATE THE THIRD TIME-VARIANT VARIABLE x3, WHICH IS CORRELATED WITH THE UNIT EFFECTS
	mu.x3 <- corgen(x=delta, r=cor.xu, epsilon=0.01)$y;

	x3 <- matrix(NA, nrow=n.state, ncol=t, byrow=TRUE);
	for (i in 1:length(mu.x3)) {
		x3[i,] <- rnorm(n=t, mean=mu.x3[i], sd=sqrt(sd.delta^2 - ((cor.xu*sd.delta*1)^2)/1 ) );
	}

	t.x3 <- matrix(t(x3), nrow=n.state*t, ncol=1, byrow=FALSE);
	sim.cor.xu <- cor(unief, t.x3);

	# GENERATE THE THIRD SLOWLY CHANGING VARIABLE z3, WHICH IS CORRELATED WITH THE UNIT EFFECTS
	z.sample <- sample(0:(t/2), n.state-3, replace=TRUE); 
	z.sample <- c(rep(0,3), z.sample);

	mu.z3 <- corgen(x=delta, r=cor.zu, epsilon=0.01)$y;

	z3 <- matrix(NA, nrow=n.state, ncol=t);
	for (i in 1:n.state) {
		z3[i,] <- rep(c(mu.z3[i], mu.z3[i]+rnorm(1,mean=1,sd=.5)), times=c(z.sample[i], t-z.sample[i]));
	}

	t.z3 <- matrix(t(z3), nrow=n.state*t, ncol=1)
	sim.cor.zu <- cor(unief, t.z3);

	return(list(x3=x3, z3=z3, xu.corr=sim.cor.xu, zu.corr=sim.cor.zu, delta=delta, mu.x3=mu.x3, mu.z3=mu.z3, unief=unief));
		
}  # END OF unif.var



